-------------------------------------------------------------------------
-- H264 top level (simulation) - VHDL
-- 
-- Written by Andy Henson
-- Copyright (c) 2008 Zexia Access Ltd
-- All rights reserved.
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions are met:
--    * Redistributions of source code must retain the above copyright
--      notice, this list of conditions and the following disclaimer.
--    * Redistributions in binary form must reproduce the above copyright
--      notice, this list of conditions and the following disclaimer in the
--      documentation and/or other materials provided with the distribution.
--    * Neither the name of the Zexia Access Ltd nor the
--      names of its contributors may be used to endorse or promote products
--      derived from this software without specific prior written permission.
--
-- THIS SOFTWARE IS PROVIDED BY ZEXIA ACCESS LTD ``AS IS'' AND ANY
-- EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-- DISCLAIMED. IN NO EVENT SHALL ZEXIA ACCESS LTD OR ANDY HENSON BE LIABLE FOR ANY
-- DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-------------------------------------------------------------------------

-- This is an example top level module for the H264 submodules.
-- Each implementation will differ at the top level due to differing
-- number of video streams, resolution, and RAM type and interface.

-- The intention is to provide a clear example of how to interface the
-- other modules rather than a complex generic synthesizable top level.
-- This module is not intended for synthesis but for simulation, but much
-- of the component instantiation is generic and can be reused in a real
-- synthesizable component.  The image buffer uses VHDL arrays here, but in
-- a real system external RAM may be required.

-- Generic design: (please see pdf file with nice block diagram)
-- There are two almost independant dataflows here, the predict/quantise
-- loop with dequantise reconstruct feedback to next prediction runs at
-- the front, outputing to xbuffer and header modules.
-- Once a macroblock has completed in the predict/quantise loop, the cavlc-- backend takes the data: the header module outputs the header and
-- the xbuffer module pumps stuff through cavlc module and both end up
-- via tobytes.  This proceeds as the next macroblock is being processed
-- in predict/quantise.
-- at the end of a line, we wait for DONE asserted by xbuffer to say
-- all quiescent (neither front end not back end busy; although there
-- is still data being clocked out via cavlc for another 20 clocks or so
-- and tobytes for another up to 100 clocks depending on size of fifo.

-- All this is regulated by a number of READY lines which pause earlier stages
-- if needed.  tobytes and cavlc pause the xbuffer pump (but not until the end
-- of the current submb - up to 16 clks).  xbuffer can pause prediction.  And
-- the prediction controls the feed of data in from image buffers.  To overcome
-- the "floppiness" of the feedback (up to 40 clks), there is a
-- fifo in tobytes as well as the ram in xbuffer module.

-- QP: note there is only a single QPvalue used here, really there should be
-- a separate one for chroma for QP>=30 or chroma_qp_index_offset/=0 (in PP).
-- latch either QPy or QPc on entry to coretransform, latch on entry to quantise
-- and dequantise when enable low.  That'll work.

library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
use ieee.numeric_std.ALL;
use ieee.math_real.ALL;
use std.textio.all;
use work.h264.all;
use work.misc.all;

--pre-headers (suggested)
--SPS: 00 00 00 01 ; 67 42 00 28 da 05 82 59 	-- contains size of image 352x288
--PPS: 00 00 00 01 ; 68 ce 38 80				-- default zero params
--Slice: 00 00 00 01 (rest of slice is generated by h264header).

entity h264top is
end h264top;

architecture sim of h264top is
	constant verbose : boolean := false;	--dump all output bits and states
	constant verbose2 : boolean := false;	--dump residuals, coeffs, etc
	constant verbose3 : boolean := false;	--dc checks; dump dc coeffs
	constant verbosep : boolean := true;	--progress indicator "newline pulsed"
	constant dumprecon : boolean := true;	--dump reconstructed image to file
	constant computesnr : boolean := true;	--compute and print SNR figures
	--
	--constant IMGWIDTH : integer := 4000;
	--constant IMGHEIGHT : integer := 2672;
	--constant IMGSKIP : integer := 8;
	constant IMGWIDTH : integer := 352;	--sample stuff is 352x288
	constant IMGHEIGHT : integer := 288;
	--constant IMGWIDTH : integer := 320;
	--constant IMGHEIGHT : integer := 128;
	constant IMGSKIP : integer := 0;		--amount to skip between lines (usually 0)
	constant MAXFRAMES : integer := 300;	--number of frames to process
	constant INITQP : integer := 28;	--0..51
	constant MAXQP : integer := INITQP;
	constant IWBITS : integer := 9;		--bits required for IMGWIDTH
	--
	type bytefile is file of character;
	type Tvideoframe is array (0 to IMGWIDTH-1, 0 to IMGHEIGHT-1) of character;
	type Tchromaframe is array (0 to IMGWIDTH/2-1, 0 to IMGHEIGHT/2-1) of character;
	--
	shared variable yvideo : Tvideoframe;
	shared variable uvideo : Tchromaframe;
	shared variable vvideo : Tchromaframe;
	--
	signal slowready :std_logic := '1';	--to run it slowly, keep this sometimes at 0
	signal slowcount :integer := 0;
	--signal iiy :integer := 0;--debug
	--signal ccy :integer := 0;--debug
	--
	signal CLK : std_logic := '0';			--clock
	signal CLK2 : std_logic;				--2x clock
	--
	-- THIS SECTION IS SKELETON FOR HARDWARE SYNTH...
	--
	signal top_NEWSLICE : std_logic := '1';			--reset: this is the first a slice
	signal top_NEWLINE : std_logic := '0';			--newline: first mb and submb
	signal intra4x4_READYI : std_logic := '0';				--ready for enable when this set
	signal intra4x4_STROBEI : std_logic := '0';				--values transfered only when this is 1
	signal intra4x4_DATAI : std_logic_vector(31 downto 0) := (others => '0');
	signal intra4x4_TOPI : std_logic_vector(31 downto 0) := (others => '0');
	signal intra4x4_TOPMI : std_logic_vector(3 downto 0) := (others => '0');
	signal intra4x4_STROBEO : std_logic := '0';				--values transfered out when this is 1
	signal intra4x4_READYO : std_logic := '0';				--when ready for out
	signal intra4x4_DATAO : std_logic_vector(35 downto 0) := (others => '0');
	signal intra4x4_BASEO : std_logic_vector(31 downto 0) := (others => '0');
	signal intra4x4_MSTROBEO : std_logic := '0';			--mode transfered only when this is 1	
	signal intra4x4_MODEO : std_logic_vector(3 downto 0) := (others => '0');	--0..8 prediction type
	signal intra4x4_PMODEO : std_logic := '0';	--prediction type same
	signal intra4x4_RMODEO : std_logic_vector(2 downto 0) := (others => '0');	--prediction type rem
	signal intra4x4_XXO : std_logic_vector(1 downto 0) := (others => '0');
	signal intra4x4_XXINC : std_logic := '0';
	signal intra4x4_CHREADY : std_logic := '0';
	--
	signal intra8x8cc_readyi : std_logic := '0';				--ready for enable when this set
	signal intra8x8cc_strobei : std_logic := '0';				--values transfered only when this is 1
	signal intra8x8cc_datai : std_logic_vector(31 downto 0) := (others => '0');
	signal intra8x8cc_TOPI : std_logic_vector(31 downto 0) := (others => '0');
	signal intra8x8cc_STROBEO : std_logic := '0';				--values transfered out when this is 1
	signal intra8x8cc_READYO : std_logic := '0';				--when ready for out
	signal intra8x8cc_DATAO : std_logic_vector(35 downto 0) := (others => '0');
	signal intra8x8cc_BASEO : std_logic_vector(31 downto 0) := (others => '0');
	signal intra8x8cc_dcstrobeo : std_logic := '0';				--when ready for out
	signal intra8x8cc_dcdatao : std_logic_vector(15 downto 0) := (others => '0');
	signal intra8x8cc_CMODEO : std_logic_vector(1 downto 0) := (others => '0');	--0..8 prediction type
	signal intra8x8cc_XXO : std_logic_vector(1 downto 0) := (others => '0');
	signal intra8x8cc_XXC : std_logic := '0';
	signal intra8x8cc_XXINC : std_logic := '0';
	--
	signal header_CMODE : std_logic_vector(1 downto 0) := b"00";	--intra_chroma_pred_mode
	signal header_VE : std_logic_vector(19 downto 0) := (others=>'0');
	signal header_VL : std_logic_vector(4 downto 0) := (others=>'0');
	signal header_VALID : std_logic := '0';	-- VE/VL valid
	--
	signal coretransform_READY : std_logic := '0';				--ready for enable when this set
	signal coretransform_ENABLE : std_logic := '0';				--values transfered only when this is 1
	signal coretransform_XXIN : std_logic_vector(35 downto 0) := (others => '0');
	signal coretransform_valid : std_logic := '0';
	signal coretransform_ynout : std_logic_vector(13 downto 0);
	--
	signal dctransform_VALID : std_logic := '0';
	signal dctransform_yyout : std_logic_vector(15 downto 0);
	signal dctransform_readyo : std_logic := '0';
	--
	signal quantise_ENABLE : std_logic := '0';
	signal quantise_YNIN : std_logic_vector(15 downto 0);
	signal quantise_valid : std_logic := '0';
	signal quantise_zout : std_logic_vector(11 downto 0);
	signal quantise_dcco : std_logic := '0';
	--
	signal dequantise_enable : std_logic := '0';
	signal dequantise_zin : std_logic_vector(15 downto 0);
	signal dequantise_last : std_logic := '0';
	signal dequantise_valid : std_logic := '0';
	signal dequantise_dcco : std_logic := '0';
	signal dequantise_wout : std_logic_vector(15 downto 0);
	--
	signal invdctransform_enable : std_logic := '0';
	signal invdctransform_zin : std_logic_vector(15 downto 0);
	signal invdctransform_valid : std_logic := '0';
	signal invdctransform_yyout : std_logic_vector(15 downto 0);
	signal invdctransform_ready : std_logic := '0';
	--
	signal invtransform_valid : std_logic := '0';
	signal invtransform_xout : std_logic_vector(39 downto 0);
	--
	signal recon_BSTROBEI : std_logic := '0';				--values transfered only when this is 1
	signal recon_basei : std_logic_vector(31 downto 0) := (others => '0');
	signal recon_FBSTROBE : std_logic := '0';				--feedback transfered only when this is 1
	signal recon_FBCSTROBE : std_logic := '0';				--feedback transfered only when this is 1
	signal recon_FEEDB : std_logic_vector(31 downto 0) := (others => '0');
	--
	signal xbuffer_NLOAD : std_logic := '0';		--load for CAVLC NOUT
	signal xbuffer_NX : std_logic_vector(2 downto 0);	--X value for NIN/NOUT
	signal xbuffer_NY : std_logic_vector(2 downto 0);	--Y value for NIN/NOUT
	signal xbuffer_NV : std_logic_vector(1 downto 0);	--valid flags for NIN/NOUT (1=left, 2=top, 3=avg)
	signal xbuffer_NXINC : std_logic := '0';		--increment for X macroblock counter
	signal xbuffer_READYI : std_logic := '0';
	signal xbuffer_CCIN : std_logic := '0';
	signal xbuffer_DONE : std_logic := '0';
	--
	signal cavlc_ENABLE : std_logic := '0';				--values transfered only when this is 1
	signal cavlc_READY : std_logic;				--values transfered only when this is 1
	signal cavlc_VIN : std_logic_vector(11 downto 0) := x"000";		--12bits max (+/- 2048)
	signal cavlc_NIN : std_logic_vector(4 downto 0) :=b"00000";	--N coeffs nearby mb
	signal cavlc_VE : std_logic_vector(24 downto 0) := (others=>'0');
	signal cavlc_VL : std_logic_vector(4 downto 0) := (others=>'0');
	signal cavlc_VALID : std_logic := '0';	-- enable delayed to same as VE/VL
	signal cavlc_XSTATE : std_logic_vector(2 downto 0) := (others=>'0');
	signal cavlc_NOUT : std_logic_vector(4 downto 0);
	--
	signal tobytes_READY : std_logic;					--soft "ready" flag
	signal tobytes_VE : std_logic_vector(24 downto 0) := (others=>'0');
	signal tobytes_VL : std_logic_vector(4 downto 0) := (others=>'0');
	signal tobytes_VALID : std_logic := '0';	-- VE/VL1 valid
	signal tobytes_BYTE: std_logic_vector(7 downto 0) := (others=>'0');
	signal tobytes_STROBE : std_logic := '0';	-- BYTE valid
	signal tobytes_DONE : std_logic := '0';		-- NAL all done
	--
	signal align_VALID : std_logic := '0';
	signal QP : std_logic_vector(5 downto 0) := CONV_STD_LOGIC_VECTOR(INITQP,6);
	--
	signal ninx : std_logic_vector(7 downto 0) := x"00";	--N coeffs nearby mb: left
	signal ninl : std_logic_vector(4 downto 0) := b"00000";	--N coeffs nearby mb: left
	signal nint : std_logic_vector(4 downto 0) := b"00000";	--N coeffs nearby mb: top
	signal ninsum : std_logic_vector(5 downto 0) := b"000000";	--N coeffs nearby mb
	type Tnin is array (natural range <>) of std_logic_vector(4 downto 0);
	signal ninleft : Tnin(7 downto 0) := (others=>(others=>'0'));
	signal nintop : Tnin(2047 downto 0) := (others=>(others=>'0'));	--macroblocks*8
	--
	type Tfullrow is array (0 to IMGWIDTH-1) of std_logic_vector(31 downto 0);
	type Tfullrowm is array (0 to IMGWIDTH-1) of std_logic_vector(3 downto 0);
	signal toppix : Tfullrow := (others=>(others=>'0'));	--actually units of 4 pixels
	signal toppixcc : Tfullrow := (others=>(others=>'0'));	--actually units of 4 pixels
	signal topmode : Tfullrowm := (others=>x"0");
	signal mbx : std_logic_vector(IWBITS-1 downto 0) := (others=>'0');	--macroblock x counter
	signal mbxcc : std_logic_vector(IWBITS-1 downto 0) := (others=>'0'); --macroblock x counter for chroma
	--
begin
	intra4x4 : h264intra4x4
	port map (
		CLK => clk2,
		--
		-- in interface:
		NEWSLICE => top_NEWSLICE,
		NEWLINE => top_NEWLINE,
		STROBEI => intra4x4_strobei,
		DATAI => intra4x4_datai,
		READYI => intra4x4_readyi,
		--
		-- top interface:
		TOPI => intra4x4_topi,
		TOPMI => intra4x4_topmi,
		XXO => intra4x4_xxo,
		XXINC => intra4x4_xxinc,
		--
		-- feedback interface:
		FEEDBI => recon_FEEDB(31 downto 24),
		FBSTROBE => recon_FBSTROBE,
		--
		-- out interface:
		STROBEO => intra4x4_strobeo,
		DATAO => intra4x4_datao,
		BASEO => intra4x4_baseo,
		READYO => intra4x4_readyo,
		MSTROBEO => intra4x4_mstrobeo,
		MODEO => intra4x4_MODEO,
		PMODEO => intra4x4_PMODEO,
		RMODEO => intra4x4_RMODEO,
		--
		CHREADY => intra4x4_CHREADY
	);
	intra4x4_readyo <= coretransform_ready and xbuffer_readyi;-- and slowready;
	intra4x4_TOPI <= toppix(conv_integer(mbx & intra4x4_XXO));
	intra4x4_TOPMI <= topmode(conv_integer(mbx & intra4x4_XXO));
	--
	intra8x8cc : h264intra8x8cc
	port map (
		CLK2 => clk2,
		--
		-- in interface:
		NEWSLICE => top_NEWSLICE,
		NEWLINE => top_NEWLINE,
		STROBEI => intra8x8cc_strobei,
		DATAI => intra8x8cc_datai,
		READYI => intra8x8cc_readyi,
		--
		-- top interface:
		TOPI  => intra8x8cc_topi,
		XXO => intra8x8cc_xxo,
		XXC => intra8x8cc_xxc,
		XXINC => intra8x8cc_xxinc,
		--
		-- feedback interface:
		FEEDBI => recon_FEEDB(31 downto 24),
		FBSTROBE => recon_FBCSTROBE,
		--
		-- out interface:
		STROBEO => intra8x8cc_strobeo,
		DATAO => intra8x8cc_datao,
		BASEO => intra8x8cc_baseo,
		READYO => intra4x4_CHREADY,
		DCSTROBEO => intra8x8cc_dcstrobeo,
		DCDATAO => intra8x8cc_dcdatao,
		CMODEO => intra8x8cc_cmodeo
	);
	intra8x8cc_TOPI <= toppixcc(conv_integer(mbxcc & intra8x8cc_XXO));
	--
	header : h264header
	port map (
		CLK => clk,
		NEWSLICE => top_NEWSLICE,
		--LASTSLICE => '1'
		SINTRA => '1',	--all slices are Intra in this test
		--
		MINTRA => '1',	--ditto all mbs
		LSTROBE => intra4x4_strobeo,
		CSTROBE => intra4x4_strobeo, --header_cstrobe,
		QP => qp,
		--
		PMODE => intra4x4_PMODEO,
		RMODE => intra4x4_RMODEO,
		CMODE => header_cmode,
		--
		PTYPE => b"00",
		PSUBTYPE => b"00",
		MVDX => x"000",
		MVDY => x"000",
		--
		VE => header_ve,
		VL => header_vl,
		VALID => header_valid
	);
	--
	coretransform : h264coretransform
	port map (
		CLK => clk2,
		READY => coretransform_ready,
		ENABLE => coretransform_enable,
		XXIN => coretransform_xxin,
		VALID => coretransform_valid,
		YNOUT => coretransform_ynout
	);
	coretransform_enable <= intra4x4_strobeo or intra8x8cc_strobeo;
	coretransform_xxin <= intra4x4_datao when intra4x4_strobeo='1' else intra8x8cc_datao;
	recon_bstrobei <= intra4x4_strobeo or intra8x8cc_strobeo;
	recon_basei <= intra4x4_baseo when intra4x4_strobeo='1' else intra8x8cc_baseo;
	--
	dctransform : h264dctransform
	generic map ( TOGETHER => 1 )
	port map (
		CLK2 => clk2,
		RESET => top_newslice,
		--READYI => 
		ENABLE => intra8x8cc_dcstrobeo,
		XXIN => intra8x8cc_dcdatao,
		VALID => dctransform_valid,
		YYOUT => dctransform_yyout,
		READYO => dctransform_readyo
	);
	dctransform_readyo <= intra4x4_CHREADY and not coretransform_valid;
	--
	quantise : h264quantise
	port map (
		CLK => clk2,
		ENABLE => quantise_ENABLE, 
		QP => qp,
		DCCI => dctransform_VALID,
		YNIN => quantise_YNIN,
		ZOUT => quantise_zout,
		DCCO => quantise_dcco,
		VALID => quantise_valid
	);
	quantise_YNIN <= sxt(coretransform_ynout,16) when coretransform_valid='1' else dctransform_yyout;
	quantise_ENABLE <= coretransform_valid or dctransform_VALID;
	--
	invdctransform : h264dctransform
	port map (
		CLK2 => clk2,
		RESET => top_newslice,
		--READYI => 
		ENABLE => invdctransform_enable,
		XXIN => invdctransform_zin,
		VALID => invdctransform_valid,
		YYOUT => invdctransform_yyout,
		READYO => invdctransform_ready
	);
	invdctransform_enable <= quantise_valid and quantise_dcco;
	invdctransform_ready <= dequantise_last and xbuffer_CCIN;
	invdctransform_zin <= sxt(quantise_zout,16);
	--
	dequantise : h264dequantise
	generic map ( LASTADVANCE => 2 )
	port map (
		CLK => clk2,
		ENABLE => dequantise_enable,
		QP => qp,
		ZIN => dequantise_zin,
		DCCI => invdctransform_valid,
		LAST => dequantise_last,
		WOUT => dequantise_wout,
		--DCCO => dequantise_dcco,
		VALID => dequantise_valid
	);
	dequantise_enable <= quantise_valid and not quantise_dcco;
	dequantise_zin <= sxt(quantise_zout,16) when invdctransform_valid='0' else invdctransform_yyout;
	--
	invtransform : h264invtransform
	port map (
		CLK => clk2,
		ENABLE => dequantise_valid,
		WIN => dequantise_wout,
		VALID => invtransform_valid,
		XOUT => invtransform_xout
	);
	--
	recon : h264recon
	port map (
		CLK2 => clk2,
		--
		NEWSLICE => top_NEWSLICE,
		STROBEI => invtransform_valid,
		DATAI => invtransform_xout,
		BSTROBEI => recon_bstrobei,
		BCHROMAI => intra8x8cc_strobeo,
		BASEI => recon_basei,
		--
		STROBEO => recon_FBSTROBE,
		CSTROBEO => recon_FBCSTROBE,
		DATAO => recon_FEEDB
	);
	--
	xbuffer : h264buffer
	port map (
		CLK => clk2,
		NEWSLICE => top_NEWSLICE,
		NEWLINE => top_NEWLINE,
		--
		VALIDI => quantise_valid,
		ZIN => quantise_zout,
		READYI => xbuffer_READYI,
		--DCREADYI => xbuffer_DCREADYI,
		CCIN => xbuffer_CCIN,
		DONE => xbuffer_DONE,
		--
		VOUT => cavlc_vin,
		VALIDO => cavlc_enable,
		--
		NLOAD => xbuffer_NLOAD,
		NX => xbuffer_NX,
		NY => xbuffer_NY,
		NV => xbuffer_NV,
		NXINC => xbuffer_NXINC,
		--
		READYO => cavlc_ready,
		TREADYO => tobytes_ready,
		HVALID => header_valid
	);
	--
	cavlc : h264cavlc
	port map (
		CLK => clk,
		CLK2 => clk2,
		ENABLE => cavlc_enable,
		READY => cavlc_ready,
		VIN => cavlc_vin,
		NIN => cavlc_nin,
		SIN => '0',
		--VS => cavlc_vs,
		VE => cavlc_ve,
		VL => cavlc_vl,
		VALID => cavlc_valid,
		XSTATE => cavlc_xstate,
		NOUT => cavlc_nout
	);
	--
	tobytes: h264tobytes
	port map (
		CLK => clk,
		VALID => tobytes_valid,
		READY => tobytes_ready,
		VE => tobytes_ve,
		VL => tobytes_vl,
		BYTE => tobytes_byte,
		STROBE => tobytes_strobe,
		DONE => tobytes_DONE
	);
	tobytes_ve <= b"00000"&header_ve when header_valid='1' else
					cavlc_ve when cavlc_valid='1' else
					'0'&x"030080";		--align+done pattern
	tobytes_vl <= header_vl when header_valid='1' else
					cavlc_vl when cavlc_valid='1' else
					b"01000";			--8 bits (1 + 7 for align)
	tobytes_valid <= header_valid or align_VALID or cavlc_valid;
	--
process(CLK2)	--nout/nin processing for CAVLC
begin
	if rising_edge(CLK2) then
		if xbuffer_NLOAD='1' then
			ninleft(conv_integer(xbuffer_NY)) <= cavlc_NOUT;
			nintop(conv_integer(ninx&xbuffer_NX)) <= cavlc_NOUT;
		else
			ninl <= ninleft(conv_integer(xbuffer_NY));
			nint <= nintop(conv_integer(ninx&xbuffer_NX));
		end if;
		if top_NEWLINE='1' then
			ninx <= (others => '0');
		elsif xbuffer_NXINC='1' then
			ninx <= ninx+1;
		end if;
	end if;
end process;
	cavlc_NIN <=
		ninl when xbuffer_NV=1 else
		nint when xbuffer_NV=2 else
		ninsum(5 downto 1) when xbuffer_NV=3 else
		(others=>'0');
	ninsum <= ('0'&ninl) + ('0'&nint) + 1;
	--
process(CLK2)	--feedback
begin
	if rising_edge(CLK2) then
		--feedback: set toppix
		if recon_FBSTROBE='1' then
			toppix(conv_integer(mbx & intra4x4_XXO)) <= recon_FEEDB;
		end if;
		if intra4x4_MSTROBEO='1' then
			topmode(conv_integer(mbx & intra4x4_XXO)) <= intra4x4_MODEO;
		end if;
		if top_NEWLINE='1' then
			mbx <= (others => '0');
		elsif intra4x4_XXINC='1' then
			mbx <= mbx + 1;
		end if;
		--
		--chroma feedback: set toppixcc
		if recon_FBCSTROBE='1' then
			toppixcc(conv_integer(mbxcc & intra8x8cc_XXO)) <= recon_FEEDB;
		end if;
		if top_NEWLINE='1' then
			mbxcc <= (others => '0');
		elsif intra8x8cc_XXINC='1' then
			mbxcc <= mbxcc + 1;
		end if;
	end if;
end process;
	--
	-- FOR SIMULATION...
	--
process		--generate CLK2, 100MHz will do for this sim, and CLK at 50MHz
begin
	CLK2 <= '0';
	wait for 5 ns;
	CLK2 <= '1';
	CLK <= not CLK;
	wait for 5 ns;
end process;
	--
process		--input
	file inb : bytefile open read_mode is "akiyo_cif.yuv";
	variable c : character;
	variable sout : line;
	variable framenum : integer := 0;
	variable x : integer;
	variable y : integer;
	variable cx : integer;
	variable cy : integer;
	variable cuv : integer;
begin
	mainlp: while (not endfile(inb) and framenum < MAXFRAMES) or QP < MAXQP loop
		if QP /= MAXQP and framenum>0 then
			QP <= QP + 1;
			wait until rising_edge(CLK2);
			write(sout,string'("Reusing framenum: "));write(sout,framenum);
			write(sout,string'(".  Using QP: "));write(sout,conv_integer(QP));
			writeline(output,sout);
		else
			--a .yuv file has the y first (w x h) followed by the u (w/2 x h/2) then v (ditto).
			--so we read it all in before starting
			for y in 0 to IMGHEIGHT-1 loop
				for x in 0 to IMGWIDTH-1 loop
					read(inb,c);
					yvideo(x,y) := c;
				end loop;
				if IMGSKIP>0 then
					for x in 1 to IMGSKIP loop
						read(inb,c);
					end loop;
				end if;
			end loop;
			for y in 0 to IMGHEIGHT/2-1 loop
				for x in 0 to IMGWIDTH/2-1 loop
					read(inb,c);
					uvideo(x,y) := c;
				end loop;
				if IMGSKIP>0 then
					for x in 1 to IMGSKIP/2 loop
						read(inb,c);
					end loop;
				end if;
			end loop;
			for y in 0 to IMGHEIGHT/2-1 loop
				for x in 0 to IMGWIDTH/2-1 loop
					read(inb,c);
					vvideo(x,y) := c;
				end loop;
				if IMGSKIP>0 then
					for x in 1 to IMGSKIP/2 loop
						read(inb,c);
					end loop;
				end if;
			end loop;
			--ok: read in image ok
			wait until rising_edge(CLK2);
			framenum := framenum+1;
			write(sout,string'("Framenum: "));write(sout,framenum);write(sout,string'(" read in ok"));
			write(sout,string'(".  Using QP: "));write(sout,conv_integer(QP));
			writeline(output,sout);
		end if;
		--
		top_NEWLINE <= '1';
		top_NEWSLICE <= '1';
		x:=0;
		y:=0;
		cx := 0;
		cy := 0;
		cuv:= 0;
		wait until rising_edge(CLK2);
		--
		--now pump in the data as requested
 		while y < IMGHEIGHT or cy < IMGHEIGHT/2 loop
 			if top_NEWLINE='1' then
 				--x := 0;
 				--y := y - (y mod 16);
 				cx := 0;
 				cy := cy - (cy mod 8);
 				cuv := 0;
 			end if;
			if intra4x4_READYI='1' and y < IMGHEIGHT then
				wait until rising_edge(CLK2);	--simulate a little delay in data arriving
				intra4x4_STROBEI <= '1';
				top_NEWLINE <= '0';
				top_NEWSLICE <= '0';
				for j in 0 to 1 loop
					for i in 0 to 3 loop
						intra4x4_DATAI <=
							CONV_STD_LOGIC_VECTOR(character'POS(yvideo(x+3,y)),8) &
							CONV_STD_LOGIC_VECTOR(character'POS(yvideo(x+2,y)),8) &
							CONV_STD_LOGIC_VECTOR(character'POS(yvideo(x+1,y)),8) &
							CONV_STD_LOGIC_VECTOR(character'POS(yvideo(x,y)),8);
						wait until rising_edge(CLK2);
						x := x+4;
					end loop;
					x := x-16;		--back to start of line for next macroblock row
					y := y+1;
				end loop;
				intra4x4_STROBEI <= '0';
				if y mod 16 = 0 then	--completed macroblock
					x := x+16;
					y := y-16;			--next macroblock
					if x=IMGWIDTH then
						x := 0;			--next line
						y := y+16;
						--wait for 1 us;
						if xbuffer_DONE='0' then wait until xbuffer_DONE='1'; end if;
						top_NEWLINE <= '1';
						if verbosep then
							write(sout,string'("Newline pulsed, line "));write(sout,y);
							write(sout,string'(" ("));write(sout,y*100/IMGHEIGHT);write(sout,string'("%)"));
							writeline(output,sout);
						end if;
					end if;
				end if;
			end if;		-- intra4x4_READYI
			if intra8x8cc_READYI='1' and cy<IMGHEIGHT/2 then
				wait until rising_edge(CLK2);	--simulate a little delay in data arriving
				intra8x8cc_STROBEI <= '1';
				for j in 0 to 3 loop
					for i in 0 to 1 loop
						if cuv=0 then
							intra8x8cc_DATAI <= 
								CONV_STD_LOGIC_VECTOR(character'POS(uvideo(cx+i*4+3,cy)),8) &
								CONV_STD_LOGIC_VECTOR(character'POS(uvideo(cx+i*4+2,cy)),8) &
								CONV_STD_LOGIC_VECTOR(character'POS(uvideo(cx+i*4+1,cy)),8) &
								CONV_STD_LOGIC_VECTOR(character'POS(uvideo(cx+i*4,cy)),8);
						else
							intra8x8cc_DATAI <= 
								CONV_STD_LOGIC_VECTOR(character'POS(vvideo(cx+i*4+3,cy)),8) &
								CONV_STD_LOGIC_VECTOR(character'POS(vvideo(cx+i*4+2,cy)),8) &
								CONV_STD_LOGIC_VECTOR(character'POS(vvideo(cx+i*4+1,cy)),8) &
								CONV_STD_LOGIC_VECTOR(character'POS(vvideo(cx+i*4,cy)),8);
						end if;
						wait until rising_edge(CLK2);
					end loop;
					cy := cy+1;
				end loop;
				intra8x8cc_STROBEI <= '0';
				if cy mod 8 = 0 then
					if cuv=0 then
						cy := cy-8;
						cuv := 1;
					else
						cuv := 0;
						cy := cy-8;
						cx := cx+8;
						if cx=IMGWIDTH/2 then
							cx := 0;			--next line
							cy := cy+8;
						end if;
					end if;
				end if;
			end if;--intra8x8cc_READYI
			--iiy <= y;
			--ccy <= cy;
			wait until rising_edge(CLK2);
		end loop;
		if verbosep then
			write(sout,string'("Done push of data into intra4x4 and intra8x8cc"));
			writeline(output,sout);
		end if;
		if xbuffer_DONE='0' then
			wait until xbuffer_DONE='1';
		end if;
		for w in 1 to 32 loop
			wait until rising_edge(CLK);
		end loop;
		wait until rising_edge(CLK);
		align_VALID <= '1';		--force ALIGN through (slower CLK rate)
		wait until rising_edge(CLK);
		align_VALID <= '0';
		wait until rising_edge(CLK);
		if verbosep then
			write(sout,string'("Done align at end of NAL"));
			writeline(output,sout);
		end if;
		if tobytes_DONE='0' then
			wait until tobytes_DONE='1';
		end if;
		wait until rising_edge(CLK);
		--write(sout,"Got DONE flag");
		--writeline(output,sout);
		wait until rising_edge(CLK);
		--
	end loop;
	write(sout,framenum);write(sout,string'(" frames processed."));
	writeline(output,sout);
	assert FALSE report "DONE" severity FAILURE;
end process;
	--
process(CLK2)		--asserts
begin
	if rising_edge(CLK2) then
		--asserts
		assert not (header_valid='1' and cavlc_valid='1') report "two strobes clash" severity ERROR;
		assert not (coretransform_valid='1' and dctransform_VALID='1') report "two strobes clash" severity ERROR;
		assert not (intra4x4_strobeo='1' and intra8x8cc_strobeo='1') report "two strobes clash" severity ERROR;
		assert not is_x(cavlc_VIN);
	end if;
end process;
	--
process(CLK)		--verbose output: stuff going to tobytes with info about it
	variable sout : line;
	variable n : integer;
	variable delayed_XSTATE : std_logic_vector(2 downto 0) := b"000";
	variable mbnumber : integer := 0;
	variable blocknumber : integer := 1;
	variable tmatch : boolean;
	variable zmatch : boolean;
	--
	constant CAVLCSTATE_IDLE   : std_logic_vector(2 downto 0) := b"000";
	constant CAVLCSTATE_READ   : std_logic_vector(2 downto 0) := b"001";
	constant CAVLCSTATE_CTOKEN : std_logic_vector(2 downto 0) := b"010";
	constant CAVLCSTATE_T1SIGN : std_logic_vector(2 downto 0) := b"011";
	constant CAVLCSTATE_COEFFS : std_logic_vector(2 downto 0) := b"100";
	constant CAVLCSTATE_TZEROS : std_logic_vector(2 downto 0) := b"101";
	constant CAVLCSTATE_RUNBF  : std_logic_vector(2 downto 0) := b"110";
begin
	if rising_edge(CLK) and verbose then
		if tobytes_VALID='1' or cavlc_XSTATE/=CAVLCSTATE_IDLE or delayed_XSTATE/=CAVLCSTATE_IDLE then
			if header_VALID='1' then
				if blocknumber > 0 then
					write(sout,string'("**** Macroblock "));
					write(sout,mbnumber);
					writeline(output,sout);
					blocknumber := 0;
					mbnumber := mbnumber + 1;
				end if;
				write(sout,string'("<HEAD>"));
			elsif delayed_XSTATE = CAVLCSTATE_IDLE then	--DEBUG OUTPUT INTERNAL delayed_XSTATES
				write(sout,string'("IDLE  "));
			elsif delayed_XSTATE = CAVLCSTATE_CTOKEN then
				write(sout,string'("Block "));
				write(sout,blocknumber);
				writeline(output,sout);
				blocknumber := blocknumber + 1;
				write(sout,string'("CTOKEN"));
--				if conv_integer(tobytes_vl)=6 and conv_integer(tobytes_ve)=10 then
--					tmatch := true;
--				else
--					tmatch := false;
--				end if;
			elsif delayed_XSTATE = CAVLCSTATE_T1SIGN then
				write(sout,string'("T1SIGN"));
			elsif delayed_XSTATE = CAVLCSTATE_COEFFS then
				write(sout,string'("COEFFS"));
			elsif delayed_XSTATE = CAVLCSTATE_TZEROS then
				write(sout,string'("TZEROS"));
--				if conv_integer(tobytes_vl)=5 and conv_integer(tobytes_ve)=1 then
--					zmatch := true;
--				else
--					zmatch := false;
--				end if;
			elsif delayed_XSTATE = CAVLCSTATE_RUNBF then
				write(sout,string'("RUNBF "));
			else
				write(sout,string'("????  "));
			end if;
		end if;
		if tobytes_VALID='1' then
			--output on VL/VE
			write(sout,string'(" out "));
			n := conv_integer(tobytes_vl);
			if n<10 then write(sout,string'(" ")); end if;
			write(sout,n);
			write(sout,string'(" "));
			for i in n-1 downto 0 loop
				if i > n then
					write(sout,'.');
				elsif i > 19 then
					write(sout,0);
				else
					write(sout,conv_integer(tobytes_ve(i)));
				end if;
			end loop;
		end if;
		if tobytes_VALID='1' or cavlc_XSTATE/=CAVLCSTATE_IDLE or delayed_XSTATE/=CAVLCSTATE_IDLE then
			writeline(output,sout);
		end if;
		delayed_XSTATE := cavlc_XSTATE;
--				assert not (tmatch and zmatch) report "BKPT" severity error;
	end if;--clk and verbose
end process;
	--
process(CLK2)		--verbose2 - residuals, coeffs, ... and verbose 3: dc
	variable pout : line;
	variable bout : line;
	variable rout : line;
	variable bcout : line;
	variable rcout : line;
	variable cout : line;
	variable qout : line;
	variable dqout : line;
	variable iout : line;
	variable fout : line;
	variable pinit : boolean := false;
	variable binit : boolean := false;
	variable rinit : boolean := false;
	variable bcinit : boolean := false;
	variable rcinit : boolean := false;
	variable cinit : boolean := false;
	variable qinit : boolean := false;
	variable dqinit : boolean := false;
	variable iinit : boolean := false;
	variable finit : boolean := false;
	variable fcinit : boolean := false;
	variable bcc : boolean := false;
	variable ccc : boolean := false;
	variable qcc : boolean := false;
	variable dqcc : boolean := false;
	variable icc : boolean := false;
	variable n : integer;
	variable macroblockn : integer := 0;
	variable blockn : integer := 0;
	variable cblockn : integer := 0;
	--
	variable dci : integer := 0;
	variable dco : integer := 0;
	variable dcs : integer := 0;
	type Tdctmp is array(3 downto 0) of integer;
	variable dctmp : Tdctmp;
	variable tmp : integer;
	variable dccc : boolean := false;
	--
begin
	if rising_edge(CLK2) and verbose2 then
		--
		if intra4x4_strobei='1' then
			if not pinit then
--				write(pout,"pixels");
				pinit := true;
			end if;
			for b in 0 to 3 loop
				n := conv_integer_signed(intra4x4_datai(b*8+7 downto b*8));
--				write(pout," ");
--				write(pout,n);
			end loop;
--			write(pout,";");
		elsif pinit then
--			writeline(output,pout);
			pinit := false;
		end if;
		--
		if intra4x4_strobeo='1' then
			if not binit then
				if blockn=0 then
					write(bout,string'("MB "));
					write(bout,macroblockn);
					write(bout,string'("; "));
					macroblockn := macroblockn+1;
				end if;
				write(bout,blockn);
				blockn := blockn+1;
				write(bout,string'(". qp="));
				n := conv_integer(qp);
				write(bout,n);
				writeline(output,bout);
				write(bout,string'("base"));
				binit := true;
			end if;
			for b in 0 to 3 loop
				n := conv_integer(intra4x4_baseo(b*8+7 downto b*8));
				write(bout,string'(" "));
				write(bout,n);
			end loop;
			write(bout,string'(";"));
		elsif binit then
			writeline(output,bout);
			binit := false;
		end if;
		--
		if intra4x4_strobeo='1' then
			if not rinit then
				write(rout,string'("residual"));
				rinit := true;
			end if;
			for b in 0 to 3 loop
				n := conv_integer_signed(intra4x4_datao(b*9+8 downto b*9));
				write(rout,string'(" "));
				write(rout,n);
			end loop;
			write(rout,string'(";"));
			bcc := false;	--luma
		elsif rinit then
			writeline(output,rout);
			rinit := false;
			if blockn=16 then
				blockn := 0;
			end if;
		end if;
		--
		if intra8x8cc_strobeo='1' then
			if not bcinit then
				write(bcout,string'("C"));
				write(bcout,cblockn);
				write(bcout,string'("."));
				cblockn := cblockn+1;
				writeline(output,bcout);
				write(bcout,string'("ccbase"));
				bcinit := true;
			end if;
			for b in 0 to 3 loop
				n := conv_integer(intra8x8cc_baseo(b*8+7 downto b*8));
				write(bcout,string'(" "));
				write(bcout,n);
			end loop;
			write(bcout,string'(";"));
			bcc := true;	--chroma
		elsif bcinit then
			writeline(output,bcout);
			bcinit := false;
			if cblockn=8 then
				cblockn := 0;
			end if;
		end if;
		--
		if intra8x8cc_strobeo='1' then
			if not rcinit then
				write(rcout,string'("ccresidual"));
				rcinit := true;
			end if;
			for b in 0 to 3 loop
				n := conv_integer_signed(intra8x8cc_datao(b*9+8 downto b*9));
				write(rcout,string'(" "));
				write(rcout,n);
			end loop;
			write(rcout,string'(";"));
		elsif rcinit then
			writeline(output,rcout);
			rcinit := false;
		end if;
		--
		if coretransform_valid='1' then
			if not cinit then
				ccc := bcc;
				if ccc then write(cout,string'("cc")); end if;
				write(cout,string'("coeff"));
				cinit := true;
			end if;
			n := conv_integer_signed(coretransform_ynout);
			write(cout,string'(" "));
			write(cout,n);
		elsif cinit then
			writeline(output,cout);
			cinit := false;
		end if;
		--
		if quantise_valid='1' then
			if not qinit then
				qcc := ccc;
				if qcc then write(qout,string'("cc")); end if;
				write(qout,string'("quant"));
				qinit := true;
			end if;
			n := conv_integer_signed(quantise_zout);	--12bit
			write(qout,string'(" "));
			write(qout,n);
		elsif qinit then
			writeline(output,qout);
			qinit := false;
		end if;		
		--
		if dequantise_valid='1' then
			if not dqinit then
				dqcc := qcc;
				if dqcc then write(dqout,string'("cc")); end if;
				write(dqout,string'("coeff'"));
				dqinit := true;
			end if;
			n := conv_integer_signed(dequantise_wout);
			write(dqout,string'(" "));
			write(dqout,n);
		elsif dqinit then
			writeline(output,dqout);
			dqinit := false;
		end if;
		--
		if invtransform_valid='1' then
			if not iinit then
				icc := dqcc;
				if icc then write(iout,string'("cc")); end if;
				write(iout,string'("residual'"));
				iinit := true;
			end if;
			for b in 0 to 3 loop
				n := conv_integer_signed(invtransform_xout(b*9+8 downto b*9));
				write(iout,string'(" "));
				write(iout,n);
			end loop;
			write(iout,string'(";"));
		elsif iinit then
			writeline(output,iout);
			iinit := false;
		end if;
		--
		if recon_FBSTROBE='1' then
			if not finit then
				write(fout,string'("recon"));
				finit := true;
			end if;
			for b in 0 to 3 loop
				n := conv_integer(recon_FEEDB(b*8+7 downto b*8));
				write(fout,string'(" "));
				write(fout,n);
			end loop;
			write(fout,string'(";"));
		elsif finit then
			writeline(output,fout);
			finit := false;
		end if;
		--
		if recon_FBCSTROBE='1' then
			if not fcinit then
				write(fout,string'("ccrecon"));
				fcinit := true;
			end if;
			for b in 0 to 3 loop
				n := conv_integer(recon_FEEDB(b*8+7 downto b*8));
				write(fout,string'(" "));
				write(fout,n);
			end loop;
			write(fout,string'(";"));
		elsif fcinit then
			writeline(output,fout);
			fcinit := false;
		end if;
		--
		--misc stuff for running slowly
		--to run fast leave "slowready" always asserted
		if slowcount=0 then
			slowcount <= 64;
			slowready <= '1';
		else
			slowcount <= slowcount-1;
			slowready <= '0';
		end if;
	end if;--clk2 and verbose2
	--
	if rising_edge(CLK2) and verbose3 then
		if top_NEWLINE='1' then
			dci := 0;
			dco := 0;
			dcs := 0;
		end if;
		if intra8x8cc_dcstrobeo='1' then
			dctmp(dci) := conv_integer_signed(intra8x8cc_dcdatao);
			assert dci<4;
			dci := dci + 1;
			assert dco=0 and dcs=0;
		end if;
		if intra8x8cc_strobeo='1' then
			dccc := true;
		elsif intra4x4_strobeo='1' then
			dccc := false;
		end if;
		if coretransform_valid='0' then
			dcs := 0;
		end if;
		if coretransform_valid='1' and dccc then
			assert dci=4;
			dcs := dcs+1;
			if dcs=16 then	--last of 16 beats
				dcs := 0;
				tmp := conv_integer_signed(coretransform_ynout);
				if tmp /= dctmp(dco) then
					assert false report "DC MISMATCH" severity failure;
					write(iout,string'("**DC MISMATCH**"));
					writeline(output,iout);
				end if;
				dco := dco+1;
				if dco=4 then
					dco := 0;
					dci := 0;
				end if;
			end if;
		end if;
	end if;--clk2 and verbose3
end process;
	--
process(CLK2)		--dump reconstructed file in YUV
	file outb : bytefile open write_mode is "test_rec.yuv";
	variable c: character;
	variable count: integer := 0;
	variable sout : line;
	variable yrvideo : Tvideoframe;
	variable urvideo : Tchromaframe;
	variable vrvideo : Tchromaframe;
	variable x : integer := 0;
	variable y : integer := 0;
	variable cx : integer := 0;
	variable cy : integer := 0;
	variable cuv : integer := 0;
	--for SNR:
	variable diff: integer := 0;
	variable ssqdiff: real := 0.0;
	variable sqmaxval: real := real(255*255);
	variable planesize: real := 0.0;
	variable snr: real := 0.0;
	variable isnr: integer := 0;
	variable fsnr: integer := 0;
begin
	if rising_edge(CLK2) and (dumprecon or computesnr) then
		if recon_FBSTROBE='1' then
			yrvideo(x,y) := character'VAL(conv_integer(recon_FEEDB(7 downto 0)));
			yrvideo(x+1,y) := character'VAL(conv_integer(recon_FEEDB(15 downto 8)));
			yrvideo(x+2,y) := character'VAL(conv_integer(recon_FEEDB(23 downto 16)));
			yrvideo(x+3,y) := character'VAL(conv_integer(recon_FEEDB(31 downto 24)));
			y := y + 1;
			if y mod 4 = 0 then
				y := y - 4;
				x := x + 4;
				if x mod 8 = 0 then
					x := x - 8;
					y := y + 4;
					if y mod 8 = 0 then
						y := y - 8;
						x := x + 8;
						if x mod 16 = 0 then
							x := x - 16;
							y := y + 8;
							if y mod 16 = 0 then
								y := y - 16;
								x := x + 16;
								if x = IMGWIDTH then
									x := 0;
									y := y + 16;
								end if;
							end if;
						end if;
					end if;
				end if;
			end if;
		end if;
		if recon_FBCSTROBE='1' then
			if cuv = 0 then
				urvideo(cx,cy) := character'VAL(conv_integer(recon_FEEDB(7 downto 0)));
				urvideo(cx+1,cy) := character'VAL(conv_integer(recon_FEEDB(15 downto 8)));
				urvideo(cx+2,cy) := character'VAL(conv_integer(recon_FEEDB(23 downto 16)));
				urvideo(cx+3,cy) := character'VAL(conv_integer(recon_FEEDB(31 downto 24)));
			else
				vrvideo(cx,cy) := character'VAL(conv_integer(recon_FEEDB(7 downto 0)));
				vrvideo(cx+1,cy) := character'VAL(conv_integer(recon_FEEDB(15 downto 8)));
				vrvideo(cx+2,cy) := character'VAL(conv_integer(recon_FEEDB(23 downto 16)));
				vrvideo(cx+3,cy) := character'VAL(conv_integer(recon_FEEDB(31 downto 24)));
			end if;
			cy := cy + 1;
			if cy mod 4 = 0 then
				cy := cy - 4;
				cx := cx + 4;
				if cx mod 8 = 0 then
					cx := cx - 8;
					cy := cy + 4;
					if cy mod 8 = 0 then
						cy := cy - 8;
						cuv := cuv + 1;
						if cuv = 2 then
							cuv := 0;
							cx := cx + 8;
							if cx = IMGWIDTH/2 then
								cx := 0;
								cy := cy + 8;
							end if;
						end if;
					end if;
				end if;
			end if;
		end if;
		if y=IMGHEIGHT and cy=IMGHEIGHT/2 and tobytes_DONE='1' then
			if dumprecon then
				--now dump file
				for y in 0 to IMGHEIGHT-1 loop
					for x in 0 to IMGWIDTH-1 loop
						write(outb, yrvideo(x,y));
					end loop;
				end loop;
				for y in 0 to IMGHEIGHT/2-1 loop
					for x in 0 to IMGWIDTH/2-1 loop
						write(outb, urvideo(x,y));
					end loop;
				end loop;
				for y in 0 to IMGHEIGHT/2-1 loop
					for x in 0 to IMGWIDTH/2-1 loop
						write(outb, vrvideo(x,y));
					end loop;
				end loop;
				write(sout,IMGHEIGHT*IMGWIDTH*3/2);
				write(sout,string'(" bytes written to test_rec.yuv"));
				writeline(output,sout);
			end if;
			--done frame
			x := 0;
			y := 0;
			cx := 0;
			cy := 0;
			cuv := 0;
			--SNR computations
			ssqdiff := 0.0;
			for y in 0 to IMGHEIGHT-1 loop
				for x in 0 to IMGWIDTH-1 loop
					diff := character'POS(yrvideo(x,y)) - character'POS(yvideo(x,y));
					ssqdiff := real(diff*diff) + ssqdiff;
				end loop;
			end loop;
			planesize := real(IMGHEIGHT*IMGWIDTH);
			if ssqdiff /= 0.0 then
				snr := 10.0*log10(sqmaxval*planesize/ssqdiff);		--same as JM ref software
				fsnr := integer(round(snr*10.0));	--centibels
				isnr := fsnr/10;			--decibels
				fsnr := fsnr - isnr*10;		--tenths of dB digit
				write(sout,string'("SNR Y: "));write(sout,isnr);write(sout,string'("."));write(sout,fsnr);write(sout,string'(" dB; "));
			else
				write(sout,string'("SNR Y: oo dB; "));
			end if;
			--writeline(output,sout);
			ssqdiff := 0.0;
			for y in 0 to IMGHEIGHT/2-1 loop
				for x in 0 to IMGWIDTH/2-1 loop
					diff := character'POS(urvideo(x,y)) - character'POS(uvideo(x,y));
					ssqdiff := real(diff*diff) + ssqdiff;
				end loop;
			end loop;
			planesize := real(IMGHEIGHT*IMGWIDTH/4);
			if ssqdiff /= 0.0 then
				snr := 10.0*log10(sqmaxval*planesize/ssqdiff);		--same as JM ref software
				fsnr := integer(round(snr*10.0));	--centibels
				isnr := fsnr/10;			--decibels
				fsnr := fsnr - isnr*10;		--tenths of dB digit
				write(sout,string'("U: "));write(sout,isnr);write(sout,string'("."));write(sout,fsnr);write(sout,string'(" dB; "));
			else
				write(sout,string'("U: oo dB; "));
			end if;
			--writeline(output,sout);
			ssqdiff := 0.0;
			for y in 0 to IMGHEIGHT/2-1 loop
				for x in 0 to IMGWIDTH/2-1 loop
					diff := character'POS(vrvideo(x,y)) - character'POS(vvideo(x,y));
					ssqdiff := real(diff*diff) + ssqdiff;
				end loop;
			end loop;
			planesize := real(IMGHEIGHT*IMGWIDTH/4);
			if ssqdiff /= 0.0 then
				snr := 10.0*log10(sqmaxval*planesize/ssqdiff);		--same as JM ref software
				fsnr := integer(round(snr*10.0));	--centibels
				isnr := fsnr/10;			--decibels
				fsnr := fsnr - isnr*10;		--tenths of dB digit
				write(sout,string'("V: "));write(sout,isnr);write(sout,string'("."));write(sout,fsnr);write(sout,string'(" dB"));
			else
				write(sout,string'("V: oo dB"));
			end if;
			writeline(output,sout);
		end if;--y
	end if;--rising_edge CLK
end process;
	--
process		--output
	file outb : bytefile open write_mode is "test.264";
	variable c: character;
	--constant hd : std_logic_vector(215 downto 0) := x"AA0000000167420033da0055007FE40000000168ce388000000001";--5440X4080
	--constant hd : std_logic_vector(215 downto 0) := x"AA0000000167420033da00FA014F900000000168ce388000000001";--4000X2672
	--constant hdsize : integer := 26;
	constant hd : std_logic_vector(199 downto 0) := x"AA0000000167420028da0582590000000168ce388000000001";--352x288
	--constant hd : std_logic_vector(199 downto 0) := x"AA0000000167420028da0504640000000168ce388000000001";--320x128
	constant hdsize : integer := 24;
	--
	variable count: integer := 0;
	variable sout : line;
begin
	for i in hdsize-1 downto 0 loop	--write HDSIZE bytes of SPS and PPS
		c := character'VAL(conv_integer(hd(i*8+7 downto i*8)));
		write(outb,c);
	end loop;
	loop
		if tobytes_STROBE='1' then
			write(outb, character'VAL(CONV_INTEGER(tobytes_BYTE)));
			count := count + 1;
		end if;
		if tobytes_DONE='1' then
			write(sout,count);write(sout,string'(" bytes in NAL ("));
			write(sout,IMGHEIGHT*IMGWIDTH*3/2/count);write(sout,string'(":1 compression) using QP: "));write(sout,conv_integer(QP));
			writeline(output,sout);
			count := 0;
			write(outb, character'VAL(0));
			write(outb, character'VAL(0));
			write(outb, character'VAL(0));
			write(outb, character'VAL(1));
		end if;
		wait until rising_edge(CLK);
	end loop;
end process;
	--
end sim;
